#‘Metabolism of A Noodle Restaurant about Food Loss and Food Waste: Micro-Level Material Flow Model and Tobit Regression Analysis’

Library

Import Data from file

Check imported data

head(df)
##         date day is_closed food_loss_kg food_waste_kg solid_waste_kg
## 1 2022-09-16 Fri     FALSE         9.50          6.55           2.50
## 2 2022-09-17 Sat     FALSE        12.25          2.80           0.60
## 3 2022-09-18 Sun     FALSE         6.50          3.25           0.85
## 4 2022-09-20 Tue     FALSE        13.10          0.70           0.30
## 5 2022-09-21 Wed     FALSE         5.70          1.10           0.45
## 6 2022-09-22 Thu     FALSE         7.25          0.80           0.35
##   liquid_waste_kg customers fulls halfs takeouts liquors   sales container
## 1            4.05        42    36     4       15       2 1080.48         0
## 2            2.20        42    30     6       12       2  861.76         0
## 3            2.40        27    24     2       10       1  629.49         0
## 4            0.40        13    10     2       12       4  635.33         0
## 5            0.65        15    10     3       10       1  533.32         0
## 6            0.45        14    10     2       16       1  680.46         0
##   temp_c humi_p prcp_mm TS_noodle_kg TS_water_kg TS_bones_kg TS_veg_kg
## 1   9.04   89.5     4.1       -7.950     -34.450     -8.7450    -4.982
## 2   7.00   92.5     1.0       -6.750     -29.250     -7.4250    -4.230
## 3   9.61   81.1     0.0       -5.250     -22.750     -5.7750    -3.290
## 4   5.66   74.1     0.0       -3.450     -14.950     -3.7950    -2.162
## 5   7.35   76.7     0.0       -3.225     -13.975     -3.5475    -2.021
## 6  10.78   66.7     0.0       -4.050     -17.550     -4.4550    -2.538
##   TS_meat_kg TS_condi_kg   TS_Broth_kg TS_Stock_kg TS_FL_kg TS_FL_bone_kg
## 1      -2.12     -0.7950  1.029300e-15       29.68   11.342       -8.7450
## 2      -1.80     -0.6750 -1.110223e-15       25.20    9.630       -7.4250
## 3      -1.40     -0.5250  1.110223e-15       19.60    7.490       -5.7750
## 4      -0.92     -0.3450 -7.569254e-16       12.88    4.922       -3.7950
## 5      -0.86     -0.3225 -1.514838e-16       12.04    4.601       -3.5475
## 6      -1.08     -0.4050  3.128362e-16       15.12    5.778       -4.4550
##   TS_FL_veg_kg TS_FL_meat_kg TS_FP_kg FL_noodle_kg FL_water_kg FL_bones_kg
## 1       -2.332       -0.2650    47.70    -6.658879   -28.85514   -7.324766
## 2       -1.980       -0.2250    40.50    -8.586449   -37.20794   -9.445093
## 3       -1.540       -0.1750    31.50    -4.556075   -19.74299   -5.011682
## 4       -1.012       -0.1150    20.70    -9.182243   -39.78972  -10.100467
## 5       -0.946       -0.1075    19.35    -3.995327   -17.31308   -4.394860
## 6       -1.188       -0.1350    24.30    -5.081776   -22.02103   -5.589953
##   FL_veg_kg FL_meat_kg FL_condi_kg   FL_Broth_kg FL_Stock_kg FL_FL_kg
## 1 -4.172897  -1.775701  -0.6658879 -1.332268e-15    24.85981     9.50
## 2 -5.380841  -2.289720  -0.8586449  1.554312e-15    32.05607    12.25
## 3 -2.855140  -1.214953  -0.4556075  1.332268e-15    17.00935     6.50
## 4 -5.754206  -2.448598  -0.9182243  1.373771e-15    34.28037    13.10
## 5 -2.503738  -1.065421  -0.3995327  8.674266e-16    14.91589     5.70
## 6 -3.184579  -1.355140  -0.5081776 -1.110223e-15    18.97196     7.25
##   FL_FL_bone_kg FL_FL_veg_kg FL_FL_meat_kg FL_FP_kg Broth_diff Final_Prod_diff
## 1     -7.324766    -1.953271    -0.2219626 39.95327  -4.820187       -7.746729
## 2     -9.445093    -2.518692    -0.2862150 51.51869   6.856075       11.018692
## 3     -5.011682    -1.336449    -0.1518692 27.33645  -2.590654       -4.163551
## 4    -10.100467    -2.693458    -0.3060748 55.09346  21.400374       34.393458
## 5     -4.394860    -1.171963    -0.1331776 23.97196   2.875888        4.621963
## 6     -5.589953    -1.490654    -0.1693925 30.49065   3.851963        6.190654
##   daily_total_served tueD wedD thuD friD satD tueE wedE thuE friE satE
## 1              47.70    0    0    0    1    0    0    0    0    1    0
## 2              40.50    0    0    0    0    1    0    0    0    0    1
## 3              31.50    0    0    0    0    0   -1   -1   -1   -1   -1
## 4              20.70    1    0    0    0    0    1    0    0    0    0
## 5              19.35    0    1    0    0    0    0    1    0    0    0
## 6              24.30    0    0    1    0    0    0    0    1    0    0
str(df)
## 'data.frame':    169 obs. of  56 variables:
##  $ date              : chr  "2022-09-16" "2022-09-17" "2022-09-18" "2022-09-20" ...
##  $ day               : chr  "Fri" "Sat" "Sun" "Tue" ...
##  $ is_closed         : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ food_loss_kg      : num  9.5 12.2 6.5 13.1 5.7 ...
##  $ food_waste_kg     : num  6.55 2.8 3.25 0.7 1.1 0.8 1.5 2.65 2.55 2.2 ...
##  $ solid_waste_kg    : num  2.5 0.6 0.85 0.3 0.45 0.35 0.65 0.7 0.8 0.8 ...
##  $ liquid_waste_kg   : num  4.05 2.2 2.4 0.4 0.65 0.45 0.85 1.95 1.75 1.4 ...
##  $ customers         : int  42 42 27 13 15 14 12 35 24 26 ...
##  $ fulls             : int  36 30 24 10 10 10 11 35 18 25 ...
##  $ halfs             : int  4 6 2 2 3 2 2 2 3 3 ...
##  $ takeouts          : int  15 12 10 12 10 16 28 23 25 13 ...
##  $ liquors           : int  2 2 1 4 1 1 2 3 6 3 ...
##  $ sales             : num  1080 862 629 635 533 ...
##  $ container         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ temp_c            : num  9.04 7 9.61 5.66 7.35 ...
##  $ humi_p            : num  89.5 92.5 81.1 74.1 76.7 66.7 75.6 71.3 70.1 74.7 ...
##  $ prcp_mm           : num  4.1 1 0 0 0 0 0 0 0 0 ...
##  $ TS_noodle_kg      : num  -7.95 -6.75 -5.25 -3.45 -3.23 ...
##  $ TS_water_kg       : num  -34.5 -29.2 -22.8 -14.9 -14 ...
##  $ TS_bones_kg       : num  -8.74 -7.42 -5.78 -3.79 -3.55 ...
##  $ TS_veg_kg         : num  -4.98 -4.23 -3.29 -2.16 -2.02 ...
##  $ TS_meat_kg        : num  -2.12 -1.8 -1.4 -0.92 -0.86 -1.08 -1.6 -2.36 -1.78 -1.58 ...
##  $ TS_condi_kg       : num  -0.795 -0.675 -0.525 -0.345 -0.323 ...
##  $ TS_Broth_kg       : num  1.03e-15 -1.11e-15 1.11e-15 -7.57e-16 -1.51e-16 ...
##  $ TS_Stock_kg       : num  29.7 25.2 19.6 12.9 12 ...
##  $ TS_FL_kg          : num  11.34 9.63 7.49 4.92 4.6 ...
##  $ TS_FL_bone_kg     : num  -8.74 -7.42 -5.78 -3.79 -3.55 ...
##  $ TS_FL_veg_kg      : num  -2.332 -1.98 -1.54 -1.012 -0.946 ...
##  $ TS_FL_meat_kg     : num  -0.265 -0.225 -0.175 -0.115 -0.107 ...
##  $ TS_FP_kg          : num  47.7 40.5 31.5 20.7 19.4 ...
##  $ FL_noodle_kg      : num  -6.66 -8.59 -4.56 -9.18 -4 ...
##  $ FL_water_kg       : num  -28.9 -37.2 -19.7 -39.8 -17.3 ...
##  $ FL_bones_kg       : num  -7.32 -9.45 -5.01 -10.1 -4.39 ...
##  $ FL_veg_kg         : num  -4.17 -5.38 -2.86 -5.75 -2.5 ...
##  $ FL_meat_kg        : num  -1.78 -2.29 -1.21 -2.45 -1.07 ...
##  $ FL_condi_kg       : num  -0.666 -0.859 -0.456 -0.918 -0.4 ...
##  $ FL_Broth_kg       : num  -1.33e-15 1.55e-15 1.33e-15 1.37e-15 8.67e-16 ...
##  $ FL_Stock_kg       : num  24.9 32.1 17 34.3 14.9 ...
##  $ FL_FL_kg          : num  9.5 12.2 6.5 13.1 5.7 ...
##  $ FL_FL_bone_kg     : num  -7.32 -9.45 -5.01 -10.1 -4.39 ...
##  $ FL_FL_veg_kg      : num  -1.95 -2.52 -1.34 -2.69 -1.17 ...
##  $ FL_FL_meat_kg     : num  -0.222 -0.286 -0.152 -0.306 -0.133 ...
##  $ FL_FP_kg          : num  40 51.5 27.3 55.1 24 ...
##  $ Broth_diff        : num  -4.82 6.86 -2.59 21.4 2.88 ...
##  $ Final_Prod_diff   : num  -7.75 11.02 -4.16 34.39 4.62 ...
##  $ daily_total_served: num  47.7 40.5 31.5 20.7 19.4 ...
##  $ tueD              : int  0 0 0 1 0 0 0 0 0 1 ...
##  $ wedD              : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ thuD              : int  0 0 0 0 0 1 0 0 0 0 ...
##  $ friD              : int  1 0 0 0 0 0 1 0 0 0 ...
##  $ satD              : int  0 1 0 0 0 0 0 1 0 0 ...
##  $ tueE              : int  0 0 -1 1 0 0 0 0 -1 1 ...
##  $ wedE              : int  0 0 -1 0 1 0 0 0 -1 0 ...
##  $ thuE              : int  0 0 -1 0 0 1 0 0 -1 0 ...
##  $ friE              : int  1 0 -1 0 0 0 1 0 -1 0 ...
##  $ satE              : int  0 1 -1 0 0 0 0 1 -1 0 ...
names(df)
##  [1] "date"               "day"                "is_closed"         
##  [4] "food_loss_kg"       "food_waste_kg"      "solid_waste_kg"    
##  [7] "liquid_waste_kg"    "customers"          "fulls"             
## [10] "halfs"              "takeouts"           "liquors"           
## [13] "sales"              "container"          "temp_c"            
## [16] "humi_p"             "prcp_mm"            "TS_noodle_kg"      
## [19] "TS_water_kg"        "TS_bones_kg"        "TS_veg_kg"         
## [22] "TS_meat_kg"         "TS_condi_kg"        "TS_Broth_kg"       
## [25] "TS_Stock_kg"        "TS_FL_kg"           "TS_FL_bone_kg"     
## [28] "TS_FL_veg_kg"       "TS_FL_meat_kg"      "TS_FP_kg"          
## [31] "FL_noodle_kg"       "FL_water_kg"        "FL_bones_kg"       
## [34] "FL_veg_kg"          "FL_meat_kg"         "FL_condi_kg"       
## [37] "FL_Broth_kg"        "FL_Stock_kg"        "FL_FL_kg"          
## [40] "FL_FL_bone_kg"      "FL_FL_veg_kg"       "FL_FL_meat_kg"     
## [43] "FL_FP_kg"           "Broth_diff"         "Final_Prod_diff"   
## [46] "daily_total_served" "tueD"               "wedD"              
## [49] "thuD"               "friD"               "satD"              
## [52] "tueE"               "wedE"               "thuE"              
## [55] "friE"               "satE"

Univariable

Open days

# sample size: open and close days ---------------------------------------
data.frame(obs_days    = nrow(df),
           open_days   = sum(df$is_closed),
           closed_days = sum(!df$is_closed))
##   obs_days open_days closed_days
## 1      169         8         161
df %>% 
  freq_table(is_closed)
## # A tibble: 2 × 3
##   is_closed     n  prop
##   <lgl>     <int> <dbl>
## 1 FALSE       161  95.3
## 2 TRUE          8   4.7
df %>% 
  select(c(date, day, is_closed))%>%
  subset(is_closed == TRUE)
##           date day is_closed
## 21  2022-10-09 Sun      TRUE
## 48  2022-11-10 Thu      TRUE
## 49  2022-11-11 Fri      TRUE
## 66  2022-12-01 Thu      TRUE
## 86  2022-12-24 Sat      TRUE
## 87  2022-12-25 Sun      TRUE
## 93  2023-01-01 Sun      TRUE
## 159 2023-03-19 Sun      TRUE

Basic Summary of Dependent Variables

# basic summary: dependents ----------------------------------------------------
data.frame(food_loss_waste   = c(summary(df$food_loss_kg + df$food_waste_kg)),
           food_loss         = c(summary(df$food_loss_kg)),
           food_waste_all    = c(summary(df$food_waste_kg)),
           food_waste_liquid = c(summary(df$liquid_waste_kg)),
           food_waste_solid  = c(summary(df$solid_waste_kg)))
##         food_loss_waste food_loss food_waste_all food_waste_liquid
## Min.           0.000000  0.000000       0.000000          0.000000
## 1st Qu.        8.250000  6.600000       0.950000          0.550000
## Median         9.500000  7.300000       1.950000          1.400000
## Mean           9.543491  7.460355       2.083136          1.408876
## 3rd Qu.       11.050000  8.150000       2.900000          2.000000
## Max.          17.900000 13.800000       6.550000          4.500000
##         food_waste_solid
## Min.           0.0000000
## 1st Qu.        0.3500000
## Median         0.6000000
## Mean           0.6742604
## 3rd Qu.        0.9000000
## Max.           2.9500000
df %>% 
  select(c(food_loss_kg,food_waste_kg,liquid_waste_kg,solid_waste_kg)) %>% 
  get_summary_stats()
## # A tibble: 4 × 13
##   variable        n   min   max median    q1    q3   iqr   mad  mean    sd    se
##   <fct>       <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 food_loss_…   169     0 13.8    7.3   6.6   8.15  1.55 1.19  7.46   2.69 0.207
## 2 food_waste…   169     0  6.55   1.95  0.95  2.9   1.95 1.48  2.08   1.45 0.111
## 3 liquid_was…   169     0  4.5    1.4   0.55  2     1.45 1.04  1.41   1.02 0.079
## 4 solid_wast…   169     0  2.95   0.6   0.35  0.9   0.55 0.445 0.674  0.51 0.039
## # ℹ 1 more variable: ci <dbl>
library(summarytools)
## Warning in fun(libname, pkgname): couldn't connect to display ":0"
## system might not have X11 capabilities; in case of errors when using dfSummary(), set st_options(use.x11 = FALSE)
## 
## Attaching package: 'summarytools'
## The following object is masked from 'package:tibble':
## 
##     view
df %>%
  select(c(food_loss_kg,food_waste_kg,
           liquid_waste_kg,solid_waste_kg)) %>% 
  descr(order = "preserve",
        stats = c('mean', 'sd', 'min', 'q1', 'med', 'q3', 'max'),
        round.digits = 6)
## Descriptive Statistics  
## df  
## N: 169  
## 
##                 food_loss_kg   food_waste_kg   liquid_waste_kg   solid_waste_kg
## ------------- -------------- --------------- ----------------- ----------------
##          Mean       7.460355        2.083136          1.408876         0.674260
##       Std.Dev       2.693018        1.445795          1.021296         0.509818
##           Min       0.000000        0.000000          0.000000         0.000000
##            Q1       6.600000        0.950000          0.550000         0.350000
##        Median       7.300000        1.950000          1.400000         0.600000
##            Q3       8.150000        2.900000          2.000000         0.900000
##           Max      13.800000        6.550000          4.500000         2.950000
# basic summary: dependents excluding closed days ------------------------------
data.frame(food_loss_waste   = c(summary(df$food_loss_kg[!df$is_closed] 
                                         + df$food_waste_kg[!df$is_closed])),
           food_loss         = c(summary(df$food_loss_kg[!df$is_closed])),
           food_waste_all    = c(summary(df$food_waste_kg[!df$is_closed])),
           food_waste_liquid = c(summary(df$liquid_waste_kg[!df$is_closed])),
           food_waste_solid  = c(summary(df$solid_waste_kg[!df$is_closed])))
##         food_loss_waste food_loss food_waste_all food_waste_liquid
## Min.             0.0000  0.000000       0.000000          0.000000
## 1st Qu.          8.4000  6.700000       1.100000          0.650000
## Median           9.6500  7.350000       2.100000          1.500000
## Mean            10.0177  7.831056       2.186646          1.478882
## 3rd Qu.         11.1500  8.400000       2.950000          2.050000
## Max.            17.9000 13.800000       6.550000          4.500000
##         food_waste_solid
## Min.            0.000000
## 1st Qu.         0.350000
## Median          0.650000
## Mean            0.707764
## 3rd Qu.         0.950000
## Max.            2.950000
df %>% 
  filter(is_closed == FALSE) %>%
  select(c(food_loss_kg,food_waste_kg,liquid_waste_kg,solid_waste_kg)) %>% 
  get_summary_stats()
## # A tibble: 4 × 13
##   variable        n   min   max median    q1    q3   iqr   mad  mean    sd    se
##   <fct>       <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 food_loss_…   161     0 13.8    7.35  6.7   8.4   1.7  1.11  7.83  2.17  0.171
## 2 food_waste…   161     0  6.55   2.1   1.1   2.95  1.85 1.33  2.19  1.40  0.111
## 3 liquid_was…   161     0  4.5    1.5   0.65  2.05  1.4  1.04  1.48  0.995 0.078
## 4 solid_wast…   161     0  2.95   0.65  0.35  0.95  0.6  0.445 0.708 0.499 0.039
## # ℹ 1 more variable: ci <dbl>
# summary of--------------------
# 1. number of observations
# 2. Averages
# 3. standard deviations
# 4. Min values
# 4. Max values
stargazer(subset(df[4:7], df$is_closed == FALSE),  flip=TRUE,
          type = "text",digits=2, out="deps1.txt")
## 
## ===================================================================
## Statistic food_loss_kg food_waste_kg solid_waste_kg liquid_waste_kg
## -------------------------------------------------------------------
## N             161           161           161             161      
## Mean          7.83         2.19           0.71           1.48      
## St. Dev.      2.17         1.40           0.50           1.00      
## Min           0.00         0.00           0.00           0.00      
## Max          13.80         6.55           2.95           4.50      
## -------------------------------------------------------------------
# Excluding the restaurant closed ---------------
stargazer(subset(df[4:7], df$is_closed == FALSE),  flip=TRUE,
          type = "text",digits=2, out="deps2.txt")
## 
## ===================================================================
## Statistic food_loss_kg food_waste_kg solid_waste_kg liquid_waste_kg
## -------------------------------------------------------------------
## N             161           161           161             161      
## Mean          7.83         2.19           0.71           1.48      
## St. Dev.      2.17         1.40           0.50           1.00      
## Min           0.00         0.00           0.00           0.00      
## Max          13.80         6.55           2.95           4.50      
## -------------------------------------------------------------------

Histograms

Normal histogram

# Create a data frame of numeric features & label
dep_features <- df %>% 
  select(c(is_closed, food_loss_kg, food_waste_kg, 
           solid_waste_kg, liquid_waste_kg))

# Pivot data to a long format
dep_features <- dep_features %>% 
  pivot_longer(!is_closed, names_to = "features", 
               values_to = "values") %>%
  group_by(features) %>% 
  mutate(Mean = mean(values),
         Median = median(values))


# Plot a histogram for each feature
dep_features %>%
  ggplot() +
  geom_histogram(aes(x = values, fill = features), 
                 bins = 100, alpha = 0.7, show.legend = F) +
  facet_wrap(~ features, scales = 'free')+
  paletteer::scale_fill_paletteer_d("ggthemes::excel_Parallax") +
  # Add lines for mean and median
  geom_vline(aes(xintercept = Mean, color = "Mean"), 
             linetype = "dashed", linewidth = 1 ) +
  geom_vline(aes(xintercept = Median, color = "Median"), 
             linetype = "dashed", linewidth = 1 ) +
  scale_color_manual(name = "", 
                     values = c(Mean = "red", Median = "yellow")) 

Histogram with density

# binwidth = bw
# bw <- 2 * IQR(df$food_loss_kg) / length(df$food_loss_kg)^(1/3)

# Histogram on food loss + food waste ------------------------------------------
hist_loss_waste <- 
  ggplot(data = subset(df, is_closed %in% FALSE), 
         aes(x = food_loss_kg + food_waste_kg)) +
  geom_histogram(aes(y = after_stat(density)), bins = 30, colour = 1, fill = "white") +
  geom_density(linewidth = 1.5, colour = 4, fill = 4, alpha = 0.15) +
  labs(title = "Food Loss and Food Waste - Histogram")
hist_loss_waste

# Histogram on food loss----------------------------------------------------
hist_loss <- 
  ggplot(data = subset(df, is_closed %in% FALSE), aes(x = food_loss_kg)) +
  geom_histogram(aes(y = after_stat(density)), bins = 30, colour = 1, fill = "white") +
  geom_density(linewidth = 1.5, colour = 4, fill = 4, alpha = 0.15) +
  labs(title = "Food Loss - Histogram")
hist_loss

# Histogram of food waste ----------------------------------------------------
hist_food_waste <- 
  ggplot(data = subset(df, is_closed %in% FALSE), aes(x = food_waste_kg)) +
  geom_histogram(aes(y = after_stat(density)), bins = 30,colour = 1, fill = "white") +
  geom_density(linewidth = 1.5, colour = 4, fill = 4, alpha = 0.15) +
  labs(title = "Food Waste - Histogram")
hist_food_waste

# Histogram of solid waste ----------------------------------------------------
hist_solid_waste <- 
  ggplot(data = subset(df, is_closed %in% FALSE), aes(x = solid_waste_kg)) +
  geom_histogram(aes(y = after_stat(density)), bins = 30,colour = 1, fill = "white") +
  geom_density(linewidth = 1.5, colour = 4, fill = 4, alpha = 0.15) +
  labs(title = "Solid Food Waste - Histogram")
hist_solid_waste

# Histogram of liquid waste ----------------------------------------------------
hist_liquid_waste <- 
  ggplot(data = subset(df, is_closed %in% FALSE), aes(x = liquid_waste_kg)) +
  geom_histogram(aes(y = after_stat(density)), bins = 30,colour = 1, fill = "white") +
  geom_density(linewidth = 1.5, colour = 4, fill = 4, alpha = 0.15) +
  labs(title = "Liquid Food Waste - Histogram")
hist_liquid_waste

grid.arrange(hist_loss_waste,hist_loss,
             hist_food_waste,hist_solid_waste,hist_liquid_waste)

### Q-Q plot

# Food loss ------------
ggplot(subset(df, is_closed %in% FALSE),
       aes(sample=food_loss_kg)) +
  stat_qq() + stat_qq_line() +
  xlab("theoretical") + ylab("sample") +
  ggtitle("QQ plot of Food Loss in kg")

# Food waste ------------
ggplot(subset(df, is_closed %in% FALSE),
       aes(sample=food_waste_kg)) +
  stat_qq() + stat_qq_line() +
  xlab("theoretical") + ylab("sample") +
  ggtitle("QQ plot of Food Waste in kg")

# Solid Food waste ------------
ggplot(subset(df, is_closed %in% FALSE),
       aes(sample=solid_waste_kg)) +
  stat_qq() + stat_qq_line() +
  xlab("theoretical") + ylab("sample") +
  ggtitle("QQ plot of Solid Food Waste in kg")

# Liquid Food waste ------------
ggplot(subset(df, is_closed %in% FALSE),
       aes(sample=liquid_waste_kg)) +
  stat_qq() + stat_qq_line() +
  xlab("theoretical") + ylab("sample") +
  ggtitle("QQ plot of Liquid Food Waste in kg")

shapiro test

# Food waste ------------
df %>% 
  filter(is_closed == FALSE) %>%
  shapiro_test(food_waste_kg, solid_waste_kg, liquid_waste_kg)
## # A tibble: 3 × 3
##   variable        statistic             p
##   <chr>               <dbl>         <dbl>
## 1 food_waste_kg       0.952 0.0000260    
## 2 liquid_waste_kg     0.951 0.0000192    
## 3 solid_waste_kg      0.903 0.00000000783

From the output, all the p-value is far less than 0.05; so implying that the distribution of the data are significantly different from normal distribution. In other words, we can not assume the normality.

Histogram per capita

# Histogram of food waste -------------------------------------------
hist_food_waste <- 
  ggplot(data = subset(df, is_closed %in% FALSE), 
         aes(x = food_waste_kg/customers)) +
  geom_histogram(aes(y = after_stat(density)), 
                 bins = 30,colour = 1, fill = "white") +
  geom_density(linewidth = 1.5, colour = 4, fill = 4, alpha = 0.15) +
  labs(title = "Food Waste - Histogram")
hist_food_waste

# Histogram of solid waste --------------------------------------------
hist_solid_waste <- 
  ggplot(data = subset(df, is_closed %in% FALSE),
         aes(x = solid_waste_kg/customers)) +
  geom_histogram(aes(y = after_stat(density)), 
                 bins = 30,colour = 1, fill = "white") +
  geom_density(linewidth = 1.5, colour = 4, fill = 4, alpha = 0.15) +
  labs(title = "Solid Food Waste - Histogram")
hist_solid_waste

# Histogram of liquid waste ----------------------------------------
hist_liquid_waste <- 
  ggplot(data = subset(df, is_closed %in% FALSE), 
         aes(x = liquid_waste_kg/customers)) +
  geom_histogram(aes(y = after_stat(density)), 
                 bins = 30,colour = 1, fill = "white") +
  geom_density(linewidth = 1.5, colour = 4, fill = 4, alpha = 0.15) +
  labs(title = "Liquid Food Waste - Histogram")
hist_liquid_waste

grid.arrange(hist_loss_waste,hist_loss,
             hist_food_waste,hist_solid_waste,hist_liquid_waste)

Q-Q plot per capita

library(ggpubr)
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:forecast':
## 
##     gghistogram
# Food waste ------------
ggqqplot(subset(df$food_waste_kg/df$customers, 
                df$is_closed %in% FALSE)) +
  xlab("theoretical") + ylab("sample") +
  ggtitle("QQ plot of Food Waste per Capita in kg")

# Solid Food waste ------------
ggqqplot(subset(df$solid_waste_kg/df$customers, 
                df$is_closed %in% FALSE)) +
  xlab("theoretical") + ylab("sample") +
  ggtitle("QQ plot of Solid Food Waste per Capita in kg")

# Liquid Food waste ------------
ggqqplot(subset(df$liquid_waste_kg/df$customers, 
                df$is_closed %in% FALSE)) +
  xlab("theoretical") + ylab("sample") +
  ggtitle("QQ plot of Liquid Food Waste per Capita in kg")

shapiro test for per capita

# Food waste ------------
df %>% 
  filter(is_closed == FALSE) %>%
  mutate(food_waste_p_kg   = food_waste_kg/customers,
         solid_waste_p_kg  = solid_waste_kg/customers,
         liquid_waste_p_kg = liquid_waste_kg/customers) %>%
  shapiro_test(food_waste_p_kg, solid_waste_p_kg, liquid_waste_p_kg)
## # A tibble: 3 × 3
##   variable          statistic        p
##   <chr>                 <dbl>    <dbl>
## 1 food_waste_p_kg       0.987 1.38e- 1
## 2 liquid_waste_p_kg     0.984 6.10e- 2
## 3 solid_waste_p_kg      0.863 6.24e-11

From the output, the p-value of solid food waste per customer is far less that the significant level of 0.05; but the others are not. So it imply that the distribution of the data for solid food waste per customer is significantly different from normal distribution. In other words, we can assume the normality for food waste and liquid food waste per customer but not for solid food waste.

Time Series Plots

Daily Time Series

# Daily Plot on food loss + food waste ---------------------------------
daily_loss_waste <- 
  ggplot(data = df, aes(x = as.Date(date), y = food_loss_kg + food_waste_kg)) +
  geom_line(aes(group = 1), color="dark blue") +
  geom_point(aes(shape = is_closed)) +
  scale_shape_manual(values=c(16, 4)) +
  scale_x_date(date_labels = "%b %d") +
  theme(legend.position = c(0.05,0.15)) +
  xlab("Date") + ylab("Daily Food Loss and Waste (kg)") +
  ggtitle("Daily Food Loss and Waste Trend")
daily_loss_waste

# Daily Plot on food loss ------------------------------------------------
daily_loss <- 
  ggplot(data = df, aes(x = as.Date(date), y = food_loss_kg)) +
  geom_line(color="blue") +
  geom_point(aes(shape = is_closed)) +
  scale_x_date(date_labels = "%b %d") +
  scale_shape_manual(values=c(16, 4))+
  theme(legend.position = c(0.9,0.85)) +
  xlab("Date") + ylab("Daily Food Loss (kg)") +
  ggtitle("Daily Food Loss Trend")
daily_loss

# Daily Plot on food waste -----------------------------------------------
daily_waste <- 
  ggplot(data = df, aes(x = as.Date(date), y = food_waste_kg)) +
  geom_line(color="black") +
  geom_point(aes(shape = is_closed)) +
  scale_x_date(date_labels = "%b %d") +
  scale_shape_manual(values=c(16, 4))+
  theme(legend.position = c(0.8,0.85)) +
  xlab("Date") + ylab("Daily Food Waste (kg)") +
  ggtitle("Daily Food Waste Trend")
daily_waste

# Daily Plot on solid food waste -----------------------------------------
daily_solid_waste <- 
  ggplot(data = df, aes(x = as.Date(date), y = solid_waste_kg)) +
  geom_line(color="dark orange") +
  geom_point(aes(shape = is_closed)) +
  scale_x_date(date_labels = "%b %d") +
  scale_shape_manual(values=c(16, 4))+
  theme(legend.position = c(0.8,0.85)) +
  xlab("Date") + ylab("Daily Solid Food Waste (kg)") +
  ggtitle("Daily Solid Food Waste Trend")
daily_solid_waste

# Daily Plot on liquid food waste ----------------------------------------
daily_liquid_waste <- 
  ggplot(data = df, aes(x = as.Date(date), y = liquid_waste_kg)) +
  geom_line(color="dark blue") +
  geom_line(color="blue", linetype = "dashed") +
  geom_point(aes(shape = is_closed)) +
  scale_x_date(date_labels = "%b %d") +
  scale_shape_manual(values=c(16, 4))+
  theme(legend.position = c(0.8,0.85)) +
  xlab("Date") + ylab("Daily Liquid ood Waste (kg)") +
  ggtitle("Daily Liquid Food Waste Trend")
daily_liquid_waste

grid.arrange(daily_loss_waste,daily_loss, daily_waste,
             daily_solid_waste,daily_liquid_waste)

# Monthly Plot on food loss and food waste ---------------------------------
monthly_loss_waste <- 
  ggplot(data = df, aes(x = day_name, 
                        y = food_loss_kg + food_waste_kg, group=1)) +
  geom_line(color="dark blue") +
  geom_point(aes(shape = is_closed)) +
  scale_shape_manual(values=c(16, 4)) +
  theme(legend.position = "none") +
  # geom_rect(data = df, aes(xmin = date, xmax = dplyr::lead(date),
  #                          ymin = -Inf, ymax = Inf,
  #                          fill = factor(!is_closed)),  alpha = .3) +
  facet_grid(month_name~.) +
  xlab("Date") + ylab("Monthly Food Loss and Waste (kg)") +
  ggtitle("Monthly Food Loss and Waste Trend")
monthly_loss_waste

# Monthly Plot on food loss ------------------------------------------------
monthly_loss <- 
  ggplot(data = df, aes(x = day_name, y = food_loss_kg, group=1)) +
  geom_line(color="black") +
  geom_point(aes(shape = is_closed)) +
  facet_grid(month_name~.) +
  scale_shape_manual(values=c(16, 4))+
  theme(legend.position = "none") +
  xlab("Date") + ylab("Monthly Food Loss (kg)") +
  ggtitle("Monthly Food Loss Trend")
monthly_loss

# Monthly Plot on food waste -----------------------------------------------
monthly_waste <- 
  ggplot(data = df, aes(x = day_name, y = food_waste_kg, group=1)) +
  geom_line(color="black") +
  geom_point(aes(shape = is_closed)) +
  facet_grid(month_name~.) +
  scale_shape_manual(values=c(16, 4))+
  theme(legend.position = "none") +
  xlab("Date") + ylab("Monthly Food Waste (kg)") +
  ggtitle("Monthly Food Waste Trend")
monthly_waste

# Monthly Plot on solid food waste -----------------------------------------
monthly_solid_waste <- 
  ggplot(data = df, aes(x = day_name, y = solid_waste_kg, group=1)) +
  geom_line(color="dark orange") +
  geom_point(aes(shape = is_closed)) +
  facet_grid(month_name~.) +
  scale_shape_manual(values=c(16, 4))+
  theme(legend.position = "none") +
  xlab("Date") + ylab("Monthly Solid Food Waste (kg)") +
  ggtitle("Monthly Solid Food Waste Trend")
monthly_solid_waste

# Monthly Plot on liquid food waste ----------------------------------------
monthly_liquid_waste <- 
  ggplot(data = df, aes(x = day_name, y = liquid_waste_kg, group=1)) +
  geom_line(color="blue") +
  geom_point(aes(shape = is_closed)) +
  facet_grid(month_name~.) +
  scale_shape_manual(values=c(16, 4))+
  theme(legend.position = "none") +
  xlab("Date") + ylab("Monthly Liquid ood Waste (kg)") +
  ggtitle("Monthly Liquid Food Waste Trend")
monthly_liquid_waste

# grid.arrange(monthly_loss_waste,monthly_loss, monthly_waste,
#              monthly_solid_waste,monthly_liquid_waste)

Boxplots

# weekly boxplot on food loss + food waste ----------------------------
boxplot_week_loss_waste <- 
  ggplot(data = subset(df, is_closed %in% FALSE), 
         aes(x=day, y=food_loss_kg + food_waste_kg)) + 
  geom_boxplot(outlier.shape=8, outlier.size=4) +
  stat_summary(fun=mean, geom="point", shape=16, size=3) +
  labs(title = "Boxplot of Food Loss and Food Waste in Day of the Week",
       x = "Week of Day", y = "Food Loss and Food Waste in kg")
boxplot_week_loss_waste

# weekly boxplot on food loss ----------------------------------
boxplot_week_food_loss <- 
  ggplot(data = subset(df, is_closed %in% FALSE),
         aes(x=day, y=food_loss_kg)) + 
  geom_boxplot(outlier.shape=8, outlier.size=4) +
  stat_summary(fun=mean, geom="point", shape=16, size=3) +
  labs(title = "Boxplot of Food Loss in Day of the Week",
       x = "Week of Day", y = "Food Loss in kg")
boxplot_week_food_loss

# weekly boxplot on food waste ------------------------------------
boxplot_week_food_waste <- 
  ggplot(data = subset(df, is_closed %in% FALSE), 
         aes(x=day, y=food_waste_kg)) + 
  geom_boxplot(outlier.shape=8, outlier.size=4) +
  stat_summary(fun=mean, geom="point", shape=16, size=3) +
  labs(title = "Boxplot of All Food Waste in Day of the Week",
       x = "Week of Day", y = "Food Waste in kg")
boxplot_week_food_waste

# weekly boxplot on solid food waste ------------------------------------
boxplot_week_solidWaste <- 
  ggplot(data = subset(df, is_closed %in% FALSE), 
         aes(x=day, y=solid_waste_kg)) + 
  geom_boxplot(outlier.shape=8, outlier.size=4) +
  stat_summary(fun=mean, geom="point", shape=16, size=3) +
  labs(title = "Boxplot of Solid Food Waste in Day of the Week",
       x = "Week of Day", y = "Solid Food Waste in kg")
boxplot_week_solidWaste

# weekly boxplot on liquid food waste ------------------------------------
boxplot_week_liquidWaste <- 
  ggplot(data = subset(df, is_closed %in% FALSE), 
         aes(x=day, y=liquid_waste_kg)) + 
  geom_boxplot(outlier.shape=8, outlier.size=4) +
  stat_summary(fun=mean, geom="point", shape=16, size=3) +
  labs(title = "Boxplot of Liquid Food Waste in Day of the Week",
       x = "Week of Day", y = "Liquid Food Waste in kg")
boxplot_week_liquidWaste

grid.arrange(boxplot_week_food_loss,boxplot_week_food_waste,
             boxplot_week_solidWaste,boxplot_week_liquidWaste)

# monthly boxplot on food loss + food waste ------------------------------------
boxplot_month_loss_waste <- 
  ggplot(data = subset(df, is_closed %in% FALSE), 
         aes(x=month_name, y=food_loss_kg+food_waste_kg)) + 
  geom_boxplot(outlier.shape=8, outlier.size=2) +
  stat_summary(fun=mean, geom="point", shape=16, size=2) +
  labs(title = "Boxplot of Food Loss and Food Waste in Month",
       x = "Month", y = "Food Loss and Waste in kg")
boxplot_month_loss_waste

# monthly boxplot on food loss ------------------------------------
boxplot_month_loss <- 
  ggplot(data = subset(df, is_closed %in% FALSE), 
         aes(x=month_name, y=food_loss_kg)) + 
  geom_boxplot(outlier.shape=8, outlier.size=2) +
  stat_summary(fun=mean, geom="point", shape=16, size=2) +
  labs(title = "Boxplot of Food Loss in Month",
       x = "Month", y = "Food Waste in kg")
boxplot_month_loss

# monthly boxplot on food waste ------------------------------------
boxplot_month_waste <- 
  ggplot(data = subset(df, is_closed %in% FALSE),
         aes(x=month_name, y=food_waste_kg)) + 
  geom_boxplot(outlier.shape=8, outlier.size=2) +
  stat_summary(fun=mean, geom="point", shape=16, size=2) +
  labs(title = "Boxplot of Daily Food Waste in Month",
       x = "Month", y = "Food Waste in kg")
boxplot_month_waste

# monthly boxplot on solid food waste ------------------------------------
boxplot_month_solidWaste <- 
  ggplot(data = subset(df, is_closed %in% FALSE),
         aes(x=month_name, y=solid_waste_kg)) + 
  geom_boxplot(outlier.shape=8, outlier.size=2) +
  stat_summary(fun=mean, geom="point", shape=16, size=2) +
  labs(title = "Boxplot of Daily Solid Food Waste in Month",
       x = "Monthy", y = "Solid Food Waste in kg")
boxplot_month_solidWaste

# boxplot of week of day for solid food waste ------------------------------------
boxplot_month_liquidWaste <- 
  ggplot(data = subset(df, is_closed %in% FALSE), 
         aes(x=month_name, y=liquid_waste_kg)) + 
  geom_boxplot(outlier.shape=8, outlier.size=2) +
  stat_summary(fun=mean, geom="point", shape=16, size=2) +
  labs(title = "Boxplot of Liquid Food Waste in Month",
       x = "Month", y = "Liquid Food Waste in kg")
boxplot_month_liquidWaste

grid.arrange(boxplot_month_loss_waste, boxplot_month_loss,boxplot_month_waste,
             boxplot_month_solidWaste,boxplot_month_liquidWaste)

Time Series Plots for Independents

## Time Series plots of:
# 1. weather conditions: temperature, humidity, precipitation
# 2. # orders + dine in + size + liquor + daily sales (confident)

# Time Series Plot on temperature ---------------------------------
tsPlot_temp <- 
  ggplot(data = df, aes(x = as.Date(date), y = temp_c)) +
  geom_point() +
  stat_smooth(method = "loess", color = "green", fill = "green") +
  # geom_line(aes(group = 1), color="orange") +
  geom_hline(aes(yintercept = 22), linetype='dotted') +
  scale_x_date(date_labels = "%b %d") +
  xlab("Date") + ylab("Temperature in Degree Celsius") +
  ggtitle("Daily Average Hourly Temperature Plot")
tsPlot_temp
## `geom_smooth()` using formula = 'y ~ x'

# Time Series Plot on gap temperature with 22C---------------------------------
tsPlot_temp_gap <- 
  ggplot(data = df, aes(x = as.Date(date), y = temp_c-22)) +
  geom_point() +
  stat_smooth(method = "loess", color = "green", fill = "green") +
  # geom_line(color="green") +
  scale_x_date(date_labels = "%b %d") +
  xlab("Date") + ylab("Gap Temperature in Degree Celsius") +
  ggtitle("Daily Gap Temperature Plot")
tsPlot_temp_gap
## `geom_smooth()` using formula = 'y ~ x'

# Time Series Plot on humidity ---------------------------------
tsPlot_humidity <- 
  ggplot(data = df, aes(x = as.Date(date), y = humi_p)) +
  geom_point() +
  stat_smooth(method = "loess", color = "green", fill = "green") +
  # geom_line(color="red") +
  scale_x_date(date_labels = "%b %d") +
  xlab("Date") + ylab("Humidity in Percent") +
  ggtitle("Daily Humidity Plot")
tsPlot_humidity
## `geom_smooth()` using formula = 'y ~ x'

# Time Series Plot on precipitation ---------------------------------
tsPlot_precip <- 
  ggplot(data = df, aes(x = as.Date(date), y = prcp_mm)) +
  geom_point() +
  stat_smooth(method = "loess", color = "green", fill = "green") +
  # geom_line(color="blue") +
  scale_x_date(date_labels = "%b %d") +
  xlab("Date") + ylab("Precipitation in millimetre") +
  ggtitle("Daily Precipitation Plot")
tsPlot_precip
## `geom_smooth()` using formula = 'y ~ x'

grid.arrange(tsPlot_temp,tsPlot_temp_gap,tsPlot_humidity, tsPlot_precip)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

## Time Series plots of:
# 1. # orders (full, half, takeouts)
# 2. daily dine in served (kg)
# 4. liquor
# 5. daily sales (confident)

# Time Series Plot on Meal Orders ---------------------------------
tsPlot_total_orders <- 
  ggplot(data = subset(df, is_closed %in% FALSE), aes(x=as.Date(date))) +
  geom_line(aes(y = fulls, color="fulls")) +
  geom_line(aes(y = halfs, color="halfs")) +
  scale_x_date(date_labels = "%b %d") +
  geom_line(aes(y = takeouts, color="takeouts"), linetype = "dashed") +
  xlab("Date") + ylab("Daily Number of Meal Orders") +
  ggtitle("Daily Different Package Meal Orders Plot")+
  scale_color_manual(name='Packages',
                     breaks=c('fulls', 'halfs', 'takeouts'),
                     values=c('fulls' = 'dark blue',
                              'halfs' = 'purple', 
                              'takeouts'='dark red')) +
  theme(legend.position = "right")
tsPlot_total_orders

# Time Series Plot on demand and production ---------------------------------
tsPlot_D_S <- 
  ggplot(data = subset(df, is_closed %in% FALSE), aes(x = as.Date(date))) +
  geom_line(aes(y = daily_total_served), color="dark blue") +
  geom_line(aes(y = FL_FP_kg), color="dark red", linetype = "dashed") +
  scale_x_date(date_labels = "%b %d") +
  xlab("Date") + ylab("Daily Quantity in kg") +
  ggtitle("Daily Total Served and Production Plot") +
  scale_color_manual(name='Served',
                     breaks=c('daily_total_served', 'FL_FP_kg'),
                     values=c('daily_total_served' = 'dark blue',
                              'FL_FP_kg' = 'dark red')) +
  theme(legend.position = "right")
tsPlot_D_S

tsPlot_diff_D_S <- 
  ggplot(data = subset(df, is_closed %in% FALSE), aes(x = as.Date(date))) +
  geom_line(aes(y = daily_total_served - FL_FP_kg), color="black") +
  stat_smooth(aes(y = daily_total_served - FL_FP_kg), method = "loess",
              color = "light green", fill = "light green") +
  scale_x_date(date_labels = "%b %d") +
  xlab("Date") + ylab("Daily Inventory in kg") +
  ggtitle("Difference Between Total Served and Production Plot")
tsPlot_diff_D_S
## `geom_smooth()` using formula = 'y ~ x'

# Time Series Plot on precipitation ---------------------------------
tsPlot_sales <- 
  ggplot(data = subset(df, is_closed %in% FALSE),aes(x = as.Date(date))) +
  geom_line(aes(y = sales), color="purple") +
  scale_x_date(date_labels = "%b %d") +
  stat_smooth(aes(y = sales), method = "loess",
              color = "light green", fill = "light green") +
  xlab("Date") + ylab("Daily Sales in dollar") +
  ggtitle("Daily Sales Plot")
tsPlot_sales
## `geom_smooth()` using formula = 'y ~ x'

grid.arrange(tsPlot_total_orders,tsPlot_D_S, tsPlot_diff_D_S,tsPlot_sales)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

(Partial and) Autocorrelation Function

## acf and pacf ----------------------------------------------------------------
# acf and pacf for food loss ---------------------------------------------------
acf_fl  <- ggAcf(as.ts(df$food_loss_kg))
pacf_fl <- ggPacf(as.ts(df$food_loss_kg))
# acf and pacf for all food waste ----------------------------------------------
acf_fw  <- ggAcf(as.ts(df$food_waste_kg))
pacf_fw <- ggPacf(as.ts(df$food_waste_kg))
# acf and pacf for solid food waste --------------------------------------------
acf_sfw  <- ggAcf(as.ts(df$solid_waste_kg))
pacf_sfw <- ggPacf(as.ts(df$solid_waste_kg))
# acf and pacf for liquid food waste -------------------------------------------
acf_lfw  <- ggAcf(as.ts(df$liquid_waste_kg))
pacf_lfw <- ggPacf(as.ts(df$liquid_waste_kg))


grid.arrange(acf_fl,pacf_fl)

grid.arrange(acf_fw,pacf_fw)

grid.arrange(acf_sfw,pacf_sfw)

grid.arrange(acf_lfw,pacf_lfw)

Spectral Analysis

# spectrum analysis for food loss ---------------------------------------------
# plot.spectrum(dt$allWasteKg)
raw.spec_fl <- list(spec.pgram(df$food_loss_kg, spans = 2))

1/raw.spec_fl[[1]]$freq[which.max(raw.spec_fl[[1]]$spec)]
## [1] 3
# spectrum analysis for food waste ---------------------------------------------
# plot.spectrum(dt$allWasteKg)
raw.spec_fw<- list(spec.pgram(df$food_waste_kg, spans = 2))

1/raw.spec_fw[[1]]$freq[which.max(raw.spec_fw[[1]]$spec)]
## [1] 6
# spectrum analysis for food waste ---------------------------------------------
# plot.spectrum(dt$allWasteKg)
raw.spec_sfw<- list(spec.pgram(df$solid_waste_kg, spans = 2))

1/raw.spec_sfw[[1]]$freq[which.max(raw.spec_sfw[[1]]$spec)]
## [1] 8.571429
# spectrum analysis for food waste ---------------------------------------------
# plot.spectrum(dt$allWasteKg)
raw.spec_lfw<- list(spec.pgram(df$liquid_waste_kg, spans = 2))

1/raw.spec_lfw[[1]]$freq[which.max(raw.spec_lfw[[1]]$spec)]
## [1] 6

roughly 6 (days) period for food waste, but food loss is approx. 3 days or 20 days cycle.

Analysis

rm(list = ls()[! ls() %in% c("df", "AdjMat")])